{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 摩尔斯编解码字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "morse_dict = {\n",
    "    'A': '·-',\n",
    "    'B': '-···',\n",
    "    'C': '-·-·',\n",
    "    'D': '-··',\n",
    "    'E': '·',\n",
    "    'F': '··-·',\n",
    "    'G': '--·',\n",
    "    'H': '····',\n",
    "    'I': '··',\n",
    "    'J': '·---',\n",
    "    'K': '-·-',\n",
    "    'L': '·-··',\n",
    "    'M': '--',\n",
    "    'N': '-·',\n",
    "    'O': '---',\n",
    "    'P': '·--·',\n",
    "    'Q': '--·-',\n",
    "    'R': '·-·',\n",
    "    'S': '···',\n",
    "    'T': '-',\n",
    "    'U': '··-',\n",
    "    'V': '···-',\n",
    "    'W': '·--',\n",
    "    'X': '-··-',\n",
    "    'Y': '-·--',\n",
    "    'Z': '--··',\n",
    "    '1': '·----',\n",
    "    '2': '··---',\n",
    "    '3': '···--',\n",
    "    '4': '····-',\n",
    "    '5': '·····',\n",
    "    '6': '-····',\n",
    "    '7': '--···',\n",
    "    '8': '---··',\n",
    "    '9': '----·',\n",
    "    '0': '-----',\n",
    "    '.': '·-·-·-',\n",
    "    ':': '---···',\n",
    "    ',': '--··--',\n",
    "    ';': '-·-·-·',\n",
    "    '?': '··--··',\n",
    "    '=': '-···-',\n",
    "    \"'\": '·----·',\n",
    "    '/': '-··-·',\n",
    "    '!': '-·-·--',\n",
    "    '-': '-····-',\n",
    "    '_': '··--·-',\n",
    "    '\"': '·-··-·',\n",
    "    '(': '-·--·',\n",
    "    ')': '-·--·-',\n",
    "    '$': '···-··-',\n",
    "    '&': '·-···',\n",
    "    '@': '·--·-·',\n",
    "    '+': '·-·-·'\n",
    "}\n",
    "morse_dict_r = {\n",
    "    '·-': 'A',\n",
    "    '-···': 'B',\n",
    "    '-·-·': 'C',\n",
    "    '-··': 'D',\n",
    "    '·': 'E',\n",
    "    '··-·': 'F',\n",
    "    '--·': 'G',\n",
    "    '····': 'H',\n",
    "    '··': 'I',\n",
    "    '·---': 'J',\n",
    "    '-·-': 'K',\n",
    "    '·-··': 'L',\n",
    "    '--': 'M',\n",
    "    '-·': 'N',\n",
    "    '---': 'O',\n",
    "    '·--·': 'P',\n",
    "    '--·-': 'Q',\n",
    "    '·-·': 'R',\n",
    "    '···': 'S',\n",
    "    '-': 'T',\n",
    "    '··-': 'U',\n",
    "    '···-': 'V',\n",
    "    '·--': 'W',\n",
    "    '-··-': 'X',\n",
    "    '-·--': 'Y',\n",
    "    '--··': 'Z',\n",
    "    '·----': '1',\n",
    "    '··---': '2',\n",
    "    '···--': '3',\n",
    "    '····-': '4',\n",
    "    '·····': '5',\n",
    "    '-····': '6',\n",
    "    '--···': '7',\n",
    "    '---··': '8',\n",
    "    '----·': '9',\n",
    "    '-----': '0',\n",
    "    '·-·-·-': '.',\n",
    "    '---···': ':',\n",
    "    '--··--': ',',\n",
    "    '-·-·-·': ';',\n",
    "    '··--··': '?',\n",
    "    '-···-': '=',\n",
    "    '·----·': \"'\",\n",
    "    '-··-·': '/',\n",
    "    '-·-·--': '!',\n",
    "    '-····-': '-',\n",
    "    '··--·-': '_',\n",
    "    '·-··-·': '\"',\n",
    "    '-·--·': '(',\n",
    "    '-·--·-': ')',\n",
    "    '···-··-': '$',\n",
    "    '·-···': '&',\n",
    "    '·--·-·': '@',\n",
    "    '·-·-·': '+'\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 莫尔斯编解码类实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class morseCodec:\n",
    "    \"\"\"Morse Encoding and Decoding\n",
    "    \"\"\"\n",
    "    def __init__(self):\n",
    "        self.morse_dict = {\n",
    "            'A': '·-',\n",
    "            'B': '-···',\n",
    "            'C': '-·-·',\n",
    "            'D': '-··',\n",
    "            'E': '·',\n",
    "            'F': '··-·',\n",
    "            'G': '--·',\n",
    "            'H': '····',\n",
    "            'I': '··',\n",
    "            'J': '·---',\n",
    "            'K': '-·-',\n",
    "            'L': '·-··',\n",
    "            'M': '--',\n",
    "            'N': '-·',\n",
    "            'O': '---',\n",
    "            'P': '·--·',\n",
    "            'Q': '--·-',\n",
    "            'R': '·-·',\n",
    "            'S': '···',\n",
    "            'T': '-',\n",
    "            'U': '··-',\n",
    "            'V': '···-',\n",
    "            'W': '·--',\n",
    "            'X': '-··-',\n",
    "            'Y': '-·--',\n",
    "            'Z': '--··',\n",
    "            '1': '·----',\n",
    "            '2': '··---',\n",
    "            '3': '···--',\n",
    "            '4': '····-',\n",
    "            '5': '·····',\n",
    "            '6': '-····',\n",
    "            '7': '--···',\n",
    "            '8': '---··',\n",
    "            '9': '----·',\n",
    "            '0': '-----',\n",
    "            '.': '·-·-·-',\n",
    "            ':': '---···',\n",
    "            ',': '--··--',\n",
    "            ';': '-·-·-·',\n",
    "            '?': '··--··',\n",
    "            '=': '-···-',\n",
    "            \"'\": '·----·',\n",
    "            '/': '-··-·',\n",
    "            '!': '-·-·--',\n",
    "            '-': '-····-',\n",
    "            '_': '··--·-',\n",
    "            '\"': '·-··-·',\n",
    "            '(': '-·--·',\n",
    "            ')': '-·--·-',\n",
    "            '$': '···-··-',\n",
    "            '&': '·-···',\n",
    "            '@': '·--·-·',\n",
    "            '+': '·-·-·'\n",
    "        }\n",
    "        self.morse_dict_r = {\n",
    "            '·-': 'A',\n",
    "            '-···': 'B',\n",
    "            '-·-·': 'C',\n",
    "            '-··': 'D',\n",
    "            '·': 'E',\n",
    "            '··-·': 'F',\n",
    "            '--·': 'G',\n",
    "            '····': 'H',\n",
    "            '··': 'I',\n",
    "            '·---': 'J',\n",
    "            '-·-': 'K',\n",
    "            '·-··': 'L',\n",
    "            '--': 'M',\n",
    "            '-·': 'N',\n",
    "            '---': 'O',\n",
    "            '·--·': 'P',\n",
    "            '--·-': 'Q',\n",
    "            '·-·': 'R',\n",
    "            '···': 'S',\n",
    "            '-': 'T',\n",
    "            '··-': 'U',\n",
    "            '···-': 'V',\n",
    "            '·--': 'W',\n",
    "            '-··-': 'X',\n",
    "            '-·--': 'Y',\n",
    "            '--··': 'Z',\n",
    "            '·----': '1',\n",
    "            '··---': '2',\n",
    "            '···--': '3',\n",
    "            '····-': '4',\n",
    "            '·····': '5',\n",
    "            '-····': '6',\n",
    "            '--···': '7',\n",
    "            '---··': '8',\n",
    "            '----·': '9',\n",
    "            '-----': '0',\n",
    "            '·-·-·-': '.',\n",
    "            '---···': ':',\n",
    "            '--··--': ',',\n",
    "            '-·-·-·': ';',\n",
    "            '··--··': '?',\n",
    "            '-···-': '=',\n",
    "            '·----·': \"'\",\n",
    "            '-··-·': '/',\n",
    "            '-·-·--': '!',\n",
    "            '-····-': '-',\n",
    "            '··--·-': '_',\n",
    "            '·-··-·': '\"',\n",
    "            '-·--·': '(',\n",
    "            '-·--·-': ')',\n",
    "            '···-··-': '$',\n",
    "            '·-···': '&',\n",
    "            '·--·-·': '@',\n",
    "            '·-·-·': '+'\n",
    "        }\n",
    "        \n",
    "    def encode(self, src_text):\n",
    "        result = ''\n",
    "        src_text = src_text.upper()\n",
    "        for single_char in src_text:\n",
    "            result += self.morse_dict.get(single_char, '\\\\') + ' '\n",
    "        return result\n",
    "\n",
    "    def decode(self, morse_txt):\n",
    "        result = ''\n",
    "        for seg in morse_txt.split():\n",
    "            result += self.morse_dict_r.get(seg, ' ')\n",
    "        return result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 平滑莫尔斯编码编解码类(未完成)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class smoothMorseCodec(morseCodec):\n",
    "    def __init__(self):\n",
    "        morseCodec.__init__(self)\n",
    "        pass\n",
    "    \n",
    "    def encode(self, src_text):\n",
    "        result = ''\n",
    "        src_text = src_text.upper()\n",
    "        for single_char in src_text:\n",
    "            result += self.morse_dict.get(single_char, '\\\\')\n",
    "        return result\n",
    "    \n",
    "    def decode(self, morse_txt):\n",
    "        result = ''\n",
    "        for seg in morse_txt.split('\\\\'):\n",
    "            result += self.morse_dict_r.get(seg, ' ')\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'···-··---···-·-······-----·-··-··-··'"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_smooth_morse = smoothMorseCodec()\n",
    "my_smooth_morse.encode('I love the world').replace('\\\\', '')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 递归方式实现平滑莫尔斯电码片段的解码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_moorse_len = max([len(k) for k in morse_dict_r])\n",
    "\n",
    "def guess_morse(morse_seg, cur_morse, cur_start, result):\n",
    "    for end in range(cur_start + 1, min(cur_start + max_moorse_len + 1, len(morse_seg) + 1)):\n",
    "        single_char = morse_dict_r.get(morse_seg[cur_start:end], None) #对切片尝试进行解码\n",
    "        if single_char:\n",
    "            if end == len(morse_seg):  #解码至串末尾递归结束\n",
    "                result.append(cur_morse + single_char)\n",
    "                break\n",
    "            else:\n",
    "                guess_morse(morse_seg, cur_morse + single_char, end, result) #递归解码剩余部分\n",
    "    if cur_start == 0:\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = guess_morse('·--··-······-·-·--··----·', '', 0, [])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8023840"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 用词典进行筛选"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_set = {}\n",
    "num = 0\n",
    "with open('google-10000-english.txt') as f:\n",
    "    for word in f.read().split():\n",
    "        num += 1\n",
    "        word_set[word.upper()] = num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1, 2, 3}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set([1,2,2,3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "466551"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(word_set)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 四种方式实现筛选"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PRESENTATION 2161\n"
     ]
    }
   ],
   "source": [
    "for word in result: #循环遍历方式\n",
    "    if word in word_set:\n",
    "        print(word, word_set[word])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = [(3213,'love'), (2420,'hello')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(2420, 'hello'), (3213, 'love')]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AUGER', 'AUGITE', 'RAGER', 'LTZEN', 'LOIR', 'LOVE']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[word for word in result if word in word_set]  #列表推导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['AUGER', 'AUGITE', 'RAGER', 'LTZEN', 'LOIR', 'LOVE']"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(filter(lambda x: x in word_set, result))  #利用filter()函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'AUGER', 'AUGITE', 'LOIR', 'LOVE', 'LTZEN', 'RAGER'}"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(result) & word_set   #利用集合运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['ETEETTTEEETE',\n",
       " 'ETEETTTEEEN',\n",
       " 'ETEETTTEEAE',\n",
       " 'ETEETTTEER',\n",
       " 'ETEETTTEITE',\n",
       " 'ETEETTTEIN',\n",
       " 'ETEETTTEUE',\n",
       " 'ETEETTTEF',\n",
       " 'ETEETTTIETE',\n",
       " 'ETEETTTIEN']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_word_dict = {}\n",
    "my_smooth_morse = smoothMorseCodec()\n",
    "\n",
    "with open('words.txt') as f:\n",
    "    for word in f.read().split():\n",
    "        all_word_dict[my_smooth_morse.encode(word)] = word.upper()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "def guess_morse_new(morse_seg):\n",
    "    if morse_seg in all_word_dict:\n",
    "        print(all_word_dict[morse_seg])\n",
    "    else:\n",
    "        print('N/A')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PRESENTATION\n"
     ]
    }
   ],
   "source": [
    "guess_morse_new('·--··-······-·-·--··----·')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_dict = {}\n",
    "with open('google-10000-english.txt') as f:\n",
    "    for word in f.read().split()[:3000]:\n",
    "        word_dict[my_smooth_morse.encode(word)] = word"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### 添加缺失单词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_dict[my_smooth_morse.encode('i')] = 'i'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 单词级递归解码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_moorse_len = max([len(k) for k in word_dict])\n",
    "min_moorse_len = min([len(k) for k in word_dict])\n",
    "\n",
    "def guess_morse(morse_seg, cur_morse, cur_start, result, depth):\n",
    "    for end in range(cur_start + min_moorse_len, min(cur_start + max_moorse_len + 1, len(morse_seg) + 1)):\n",
    "        single_word = word_dict.get(morse_seg[cur_start:end], None) #对切片尝试进行解码\n",
    "        if single_word:\n",
    "            if end == len(morse_seg):  #解码至串末尾递归结束\n",
    "                result.append(cur_morse + ' ' + single_word)\n",
    "                break\n",
    "            elif depth <= 7:\n",
    "                guess_morse(morse_seg, cur_morse + ' ' + single_word, end, result, depth + 1) #递归解码剩余部分\n",
    "    if cur_start == 0:\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = guess_morse('···-··---···-·-·-----··-', '', 0, [], 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "272337"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_ = []\n",
    "for item in result:\n",
    "    result_.append((len(item.split()), item))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "result_ = sorted(result_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'I' in word_dict.values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(3, ' i love you'),\n",
       " (4, ' e e love you'),\n",
       " (4, ' e fat dr you'),\n",
       " (4, ' eu eat dr you'),\n",
       " (4, ' eu u msn you'),\n",
       " (4, ' i aim dr you'),\n",
       " (4, ' i au msn you'),\n",
       " (4, ' i aug f you'),\n",
       " (4, ' i los co ga'),\n",
       " (4, ' i los com u'),\n",
       " (4, ' i los come et'),\n",
       " (4, ' i los n you'),\n",
       " (4, ' i lost e you'),\n",
       " (4, ' i lost rom u'),\n",
       " (4, ' i love km ga'),\n",
       " (4, ' i love not tea'),\n",
       " (4, ' i love two u'),\n",
       " (4, ' ie name f you'),\n",
       " (4, ' ie nj scott u'),\n",
       " (4, ' ie tea msn you'),\n",
       " (4, ' ie teams co ga'),\n",
       " (4, ' ie teams com u'),\n",
       " (4, ' ie teams come et'),\n",
       " (4, ' ie teams n you'),\n",
       " (4, ' ie tim dr you'),\n",
       " (4, ' sd m dr you'),\n",
       " (4, ' sd md en you'),\n",
       " (4, ' sd mt scott u'),\n",
       " (4, ' sd os co ga'),\n",
       " (4, ' sd os com u')]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_[:30]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 用递归实现反向最大匹配解码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_moorse_len = max([len(k) for k in word_dict])\n",
    "min_moorse_len = min([len(k) for k in word_dict])\n",
    "\n",
    "def guess_morse_reverse_max_len(morse_seg, if_add_space=False):\n",
    "    if morse_seg == '':\n",
    "        return ''\n",
    "    else:\n",
    "        for start in range(max(len(morse_seg) - max_moorse_len, 0), len(morse_seg)):\n",
    "            single_word = word_dict.get(morse_seg[start:], None)\n",
    "            if single_word:\n",
    "                break\n",
    "        print(morse_seg[:start], single_word)\n",
    "        return guess_morse_reverse_max_len(morse_seg[:start], True) + single_word + (' ' if if_add_space else '')\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "···-··---···-·-····· world\n",
      "···-··---·· ends\n",
      "···-· az\n",
      "· f\n",
      " e\n"
     ]
    }
   ],
   "source": [
    "result = guess_morse_reverse_max_len('···-··---···-·-······-----·-··-··-··')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'e f az ends world'"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "·· love\n"
     ]
    }
   ],
   "source": [
    "result = guess_morse_reverse_max_len('···-··---···-·')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " i\n"
     ]
    }
   ],
   "source": [
    "result = guess_morse_reverse_max_len('··')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "38"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_moorse_len"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "morse_seg = ''.join([chr(i) for i in range(ord('A'), ord('z')+1)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "58"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(morse_seg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "UVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz 38\n",
      "VWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz 37\n",
      "WXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz 36\n",
      "XYZ[\\]^_`abcdefghijklmnopqrstuvwxyz 35\n",
      "YZ[\\]^_`abcdefghijklmnopqrstuvwxyz 34\n",
      "Z[\\]^_`abcdefghijklmnopqrstuvwxyz 33\n",
      "[\\]^_`abcdefghijklmnopqrstuvwxyz 32\n",
      "\\]^_`abcdefghijklmnopqrstuvwxyz 31\n",
      "]^_`abcdefghijklmnopqrstuvwxyz 30\n",
      "^_`abcdefghijklmnopqrstuvwxyz 29\n",
      "_`abcdefghijklmnopqrstuvwxyz 28\n",
      "`abcdefghijklmnopqrstuvwxyz 27\n",
      "abcdefghijklmnopqrstuvwxyz 26\n",
      "bcdefghijklmnopqrstuvwxyz 25\n",
      "cdefghijklmnopqrstuvwxyz 24\n",
      "defghijklmnopqrstuvwxyz 23\n",
      "efghijklmnopqrstuvwxyz 22\n",
      "fghijklmnopqrstuvwxyz 21\n",
      "ghijklmnopqrstuvwxyz 20\n",
      "hijklmnopqrstuvwxyz 19\n",
      "ijklmnopqrstuvwxyz 18\n",
      "jklmnopqrstuvwxyz 17\n",
      "klmnopqrstuvwxyz 16\n",
      "lmnopqrstuvwxyz 15\n",
      "mnopqrstuvwxyz 14\n",
      "nopqrstuvwxyz 13\n",
      "opqrstuvwxyz 12\n",
      "pqrstuvwxyz 11\n",
      "qrstuvwxyz 10\n",
      "rstuvwxyz 9\n",
      "stuvwxyz 8\n",
      "tuvwxyz 7\n",
      "uvwxyz 6\n",
      "vwxyz 5\n",
      "wxyz 4\n",
      "xyz 3\n",
      "yz 2\n",
      "z 1\n"
     ]
    }
   ],
   "source": [
    "for start in range(max(len(morse_seg) - max_moorse_len, 0), len(morse_seg)):\n",
    "    print(morse_seg[start:], len(morse_seg[start:]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "req = requests.get('https://wap.zol.com.cn/top/cell_phone/hot.html')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "c"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 用正则表达式实现信息抽取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = re.findall(\n",
    "    '<p class=\"pro-info-name f28\">(.*?)<\\/p>[\\S\\s]*?<span class=\"pro-info-price f24\">(.*?)<\\/span>',\n",
    "    req.text\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 保存爬取信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('mobile.txt', 'w') as f:\n",
    "    for item in result:\n",
    "        f.write(item[0] + ' ' + item[1] + '\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OPPO Ace2（8GB/128GB/全网通/5G版） ￥3999\n",
      "iQOO Neo3（6GB/128GB/全网通/5G版） ￥2698\n",
      "华为P40 Pro（8GB/128GB/全网通/5G版） ￥5988\n",
      "一加8 Pro（8GB/128GB/全网通/5G版） ￥5399\n",
      "荣耀30 Pro+（8GB/256GB/全网通/5G版） ￥4999\n",
      "魅族17（8GB/128GB/全网通/5G版） ￥3699\n",
      "华为nova 7（8GB/128GB/5G版/全网通） ￥2999\n",
      "OPPO Find X2 Pro（12GB/256GB/全网通/5G版） ￥6999\n",
      "苹果iPhone SE 2（3GB/64GB/全网通） ￥3299\n",
      "苹果iPhone 11（4GB/64GB/全网通） ￥5499\n",
      "小米10（8GB/128GB/全网通） ￥3999\n",
      "一加8（8GB/128GB/全网通/5G版） ￥3999\n",
      "vivo S6（8GB/128GB/全网通/5G版） ￥2698\n",
      "华为P40（6GB/128GB/全网通/5G版） ￥4188\n",
      "荣耀30（6GB/128GB/全网通/5G版） ￥2999\n",
      "华为nova 7 Pro（8GB/128GB/5G版/全网通） ￥3699\n",
      "OPPO A92s（6GB/128GB/全网通/5G版） ￥2199\n",
      "vivo Z6（6GB/128GB/全网通/5G版） ￥2198\n",
      "华为nova 6 5G（8GB/128GB/全网通） ￥3499\n",
      "华为P40（8GB/128GB/全网通/5G版） ￥4488\n",
      "荣耀30S（8GB/128GB/全网通/5G版） ￥2399\n",
      "华为Mate30 Pro（8GB/256GB/全网通/5G版/玻璃版） ￥6399\n",
      "OPPO Find X2（8GB/128GB/全网通/5G版） ￥5499\n",
      "小米10 Pro（8GB/256GB/全网通） ￥4999\n",
      "荣耀9X（4GB/64GB/全网通） ￥1199\n",
      "华为Mate30（8GB/128GB/全网通/5G版/玻璃版） ￥4499\n",
      "魅族16T（6GB/128GB/全网通） ￥1999\n",
      "苹果iPhone SE 2（3GB/128GB/全网通） ￥3799\n",
      "苹果iPhone 11 Pro Max（4GB/64GB/全网通） ￥9599\n",
      "vivo S5（8GB/128GB/全网通） ￥1998\n",
      "苹果iPhone 11（4GB/128GB/全网通） ￥5999\n",
      "华为P30（8GB RAM/全网通） ￥2888\n",
      "华为P30 Pro（8GB/128GB/全网通） ￥4488\n",
      "苹果iPhone XR（全网通） ￥4399\n",
      "苹果iPhone X（全网通） ￥5999\n",
      "苹果iPhone XS Max（全网通） ￥6299\n",
      "苹果iPhone 8 Plus（全网通） ￥3999\n",
      "三星Galaxy S20（12GB/128GB/全网通） ￥6999\n",
      "苹果iPhone SE 2（3GB/256GB/全网通） ￥4599\n",
      "三星Galaxy S20 Ultra（12GB/256GB/全网通） ￥9999\n",
      "苹果iPhone 11 Pro（4GB/64GB/全网通） ￥8699\n",
      "苹果iPhone 8（全网通） ￥3578\n",
      "苹果iPhone 7 Plus（全网通） ￥2799\n",
      "苹果iPhone 7（全网通） ￥3799\n",
      "苹果iPhone 2020版（5G版全网通） 概念产品\n",
      "苹果iPhone XS（全网通） ￥5099\n",
      "苹果iPhone 11 Pro Max（4GB/256GB/全网通） ￥10899\n",
      "OPPO A52（8GB/128GB/全网通） ￥1599\n",
      "华为nova 5 Pro（8GB/128GB/全网通） ￥2499\n",
      "vivo NEX 3S（8GB/256GB/全网通/5G版） ￥4998\n"
     ]
    }
   ],
   "source": [
    "!cat mobile.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
